template<int UNROLL_TIMES>
void add(double  *__restrict__ a, double *__restrict__ b){
  for (int i = 0; i < 256; i ++){
    __builtin_force_unroll(UNROLL_TIMES);
    a[i] = a[i] + b[i];
  }
}

template void add<16>(double *__restrict__ a, double *__restrict__ b);
template void add<8>(double *__restrict__ a, double *__restrict__ b);
